#############################
# merge long df
#############################

## merge p & h
dublicates <- intersect(names(fgzw1p),names(fgzw1h))

dublicates <- c(dublicates[3],dublicates[5:length(dublicates)]) 

fgzw1p <- arrange(fgzw1p,hid)

fgzw1h <- fgzw1h %>% 
  arrange(hid) %>% 
  select(-c(all_of(dublicates)))

w1_long <- merge(fgzw1p,fgzw1h, by = c("hid","cid","wave"),all=T)  

# save income information from both parts of W1 to all pid rows
fgzw1h <- fgzw1h %>%
  mutate_all(~ ifelse(. < 0, NA, .)) %>% 
  mutate(hwealth = case_when(hwealth == 9 ~ 0,
                           TRUE ~ hwealth)) 
  
hinc <- fgzw1h %>% 
  mutate(hhhincnet_w1t1 = case_when(wave == "W1T1" ~ hhhincnet, TRUE ~ NA),
         hhhincnet_w1t2 = case_when(wave == "W1T2" ~ hhhincnet, TRUE ~ NA)) %>% 
  mutate(hhhsize_w1t1 = case_when(wave == "W1T1" ~ hhhsize, TRUE ~ NA),
         hhhsize_w1t2 = case_when(wave == "W1T2" ~ hhhsize, TRUE ~ NA)) %>% 
  group_by(hid) %>% 
  mutate(hhhincnet_w1t1 = mean(hhhincnet_w1t1,na.rm=T)) %>% 
  mutate(hhhincnet_w1t2 = mean(hhhincnet_w1t2,na.rm=T)) %>% 
  mutate(hwealth_w1 = mean(hwealth,na.rm=T)) %>% 
  mutate(hhhsize_w1t1 = mean(hhhsize_w1t1,na.rm=T)) %>% 
  mutate(hhhsize_w1t2 = mean(hhhsize_w1t2,na.rm=T)) %>% 
  mutate(hhhincnet_w1 = case_when(is.na(hhhincnet_w1t1) & !is.na(hhhincnet_w1t2) ~ hhhincnet_w1t2,
                               !is.na(hhhincnet_w1t1) & is.na(hhhincnet_w1t2) ~ hhhincnet_w1t1,
                               hhhincnet_w1t1 < hhhincnet_w1t2 ~ hhhincnet_w1t1,
                               hhhincnet_w1t1 > hhhincnet_w1t2 ~ hhhincnet_w1t2,
                               hhhincnet_w1t1 == hhhincnet_w1t2 ~ hhhincnet_w1t1)) %>% # use income from part 1, if not available from part 2, if both available and different use lower one
  mutate(hhhsize_w1 = ifelse(!is.na(hhhsize_w1t1),hhhsize_w1t1,hhhsize_w1t2)) %>%  # use hhhsize from part 1, if not available from part 2
  ungroup() %>% 
  select(hid,cid, hhhincnet_w1,hhhsize_w1,hwealth_w1,wave)

w1_long <- merge(w1_long,hinc, by = c("hid","cid","wave"),all=T)  

## hgen
dublicates <- intersect(names(w1_long),names(hgen))
dublicates <- c(dublicates[4:length(dublicates)])  

hgen <- hgen %>% 
  arrange(hid) %>% 
  select(-c(all_of(dublicates)))

w1_long <- merge(w1_long,hgen, by = c("hid","cid","wave"),all=T) 

#hhrf
dublicates <- intersect(names(w1_long),names(hhrf)) 

hhrf <- hhrf %>% 
  arrange(hid) 

w1_long <- merge(w1_long,hhrf, by = c("hid","wave"),all=T) 

##phrf
dublicates <- intersect(names(w1_long),names(phrf))

dublicates <- dublicates[3:length(dublicates)]

phrf <- phrf %>% 
  arrange(pid) %>% 
  select(-c(all_of(dublicates)))

w1_long <- merge(w1_long,phrf, by = c("pid","wave"),all=T)

## wave 2

dublicates <- intersect(names(fgzw2p),names(fgzw2h))

dublicates <- c(dublicates[1],dublicates[3:length(dublicates)]) # take "hid" from list

fgzw2p <- arrange(fgzw2p,hid)
fgzw2h <- fgzw2h %>% 
  arrange(hid) %>% 
  select(-c(all_of(dublicates)))

w2_long <- merge(fgzw2p,fgzw2h, by = c("hid"),all=T) 

# fgzw2hhrf

dublicates <- intersect(names(w2_long),names(fgzw2hhrf))
dublicates <- dublicates[2] # take "hid" from list

fgzw2hhrf <- fgzw2hhrf %>% 
  arrange(hid) %>% 
  select(-c(all_of(dublicates)))

w2_long <- merge(w2_long,fgzw2hhrf, by = c("hid"),all=T) 

# fgzw2phrf

dublicates <- intersect(names(w2_long),names(fgzw2phrf))
dublicates <- c(dublicates[1],dublicates[3:length(dublicates)]) # take "pid" from list

fgzw2phrf <- fgzw2phrf %>% 
  arrange(hid) %>% 
  select(-c(all_of(dublicates))) 


w2_long <- merge(w2_long,fgzw2phrf, by = c("pid"),all=T) 

w2_long <- w2_long %>% 
  mutate(wave = "W2")

## wave 3

fgzw3p <- fgzw3p %>% 
  mutate(wave = "W3")

fgz_long  <- bind_rows(w1_long, w2_long, fgzw3p) %>% arrange(pid)

fgz_long <- as.data.frame(fgz_long)

#############################
# set missings to NA
#############################

fgz_long <- fgz_long %>%
  mutate_all(~ ifelse(. < 0, NA, .))

#############################
# dummies for waves
#############################

fgz_long <- fgz_long %>% 
  mutate(wave1_2_3 = case_when(wave == "W1T1" ~ 1,
                               wave == "W1T2" ~ NA,
                               wave == "W2" ~ 2,
                               wave == "W3" ~ 3 )) %>% 
  mutate(wave1_3 = case_when(wave == "W1T1" ~ 0,
                               wave == "W1T2" ~ NA,
                               wave == "W2" ~ NA,
                               wave == "W3" ~ 1 )) %>% 
  mutate(wave1_dummy = case_when(wave == "W1T1" ~ 1,
                                 wave == NA ~ NA,
                                 TRUE ~ 0),
         wave3_dummy = case_when(wave == "W3" ~ 1,
                                 wave == NA ~ NA,
                                 TRUE ~ 0))



#############################
# delete df's which we do not need

rm(fgzw1h, 
   fgzw1p,
   hgen,
   pgen,
   hhrf,
   phrf,
   fgzw2h,
   fgzw2p,
   fgzw2hhrf,
   fgzw2phrf,
   w1_long,
   w2_long,
   fgzvor,
   fgzw3p,
   dublicates,
   hinc
)

